{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyONPJCAwPx0Wq0HDbOoi4Tk",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/sugarforever/LangChain-Tutorials/blob/main/LangChain_Output_Parsing.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "In this notebook, we will learn how to use LangChain's output parser to process LLM output in a more programming language friendly way."
      ],
      "metadata": {
        "id": "UV1rzv4pfn8o"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 38,
      "metadata": {
        "id": "3scUGKX6fh94"
      },
      "outputs": [],
      "source": [
        "!pip install langchain openai -qU"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# CommaSeparatedListOutputParser"
      ],
      "metadata": {
        "id": "8wqIvBtegME0"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain.output_parsers import CommaSeparatedListOutputParser\n",
        "from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate\n",
        "from langchain.llms import OpenAI\n",
        "from langchain.chat_models import ChatOpenAI"
      ],
      "metadata": {
        "id": "hLxERwUAgI02"
      },
      "execution_count": 39,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "OPENAI_API_KEY = 'your openai api key here'"
      ],
      "metadata": {
        "id": "xaXmIOlUh7du"
      },
      "execution_count": 37,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "output_parser = CommaSeparatedListOutputParser()\n",
        "format_instructions = output_parser.get_format_instructions()\n",
        "prompt = PromptTemplate(\n",
        "    template=\"List 3 main-stream {subject}.\\n{format_instructions}\",\n",
        "    input_variables=[\"subject\"],\n",
        "    partial_variables={\"format_instructions\": format_instructions}\n",
        ")"
      ],
      "metadata": {
        "id": "dA7lnAUkg0O0"
      },
      "execution_count": 46,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(format_instructions)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "kISdCOOus7dK",
        "outputId": "811f281a-f567-42de-8bb0-22b63409d206"
      },
      "execution_count": 47,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Your response should be a list of comma separated values, eg: `foo, bar, baz`\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)"
      ],
      "metadata": {
        "id": "qiIwAWalgZKJ"
      },
      "execution_count": 48,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "_input = prompt.format(subject=\"music styles\")\n",
        "output = llm(_input)\n"
      ],
      "metadata": {
        "id": "dYZdV_wHgvcd"
      },
      "execution_count": 52,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(output)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9MHBQyaihwdI",
        "outputId": "93e41b4d-23aa-40d1-ba5a-770522047737"
      },
      "execution_count": 53,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "\n",
            "Pop, Rock, Hip-Hop\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "output_parser.parse(output)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ffPXRbX-h2Lt",
        "outputId": "f6bf11f2-66b8-4ff1-f724-d44bfc08a1db"
      },
      "execution_count": 54,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['Pop', 'Rock', 'Hip-Hop']"
            ]
          },
          "metadata": {},
          "execution_count": 54
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# EnumOutputParser"
      ],
      "metadata": {
        "id": "t9M4boioiSAa"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain.output_parsers.enum import EnumOutputParser"
      ],
      "metadata": {
        "id": "vRqHTEYliXUb"
      },
      "execution_count": 55,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from enum import Enum\n",
        "\n",
        "class Genders(Enum):\n",
        "    MALE = \"male\"\n",
        "    FEMALE = \"female\""
      ],
      "metadata": {
        "id": "W08I3jHDicmR"
      },
      "execution_count": 56,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "output_parser = EnumOutputParser(enum=Genders)\n",
        "format_instructions = output_parser.get_format_instructions()\n",
        "prompt = PromptTemplate(\n",
        "    template=\"Tell me the gender of the celebrity {name}.\\n{format_instructions}\",\n",
        "    input_variables=[\"name\"],\n",
        "    partial_variables={\"format_instructions\": format_instructions}\n",
        ")"
      ],
      "metadata": {
        "id": "XelNJSDJilxD"
      },
      "execution_count": 57,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(format_instructions)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "arF_9ckdw9TK",
        "outputId": "47e54ba9-2e34-4f7f-fc60-89e870ce2c92"
      },
      "execution_count": 58,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Select one of the following options: male, female\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "_input = prompt.format(name=\"Michael Jordan\")\n",
        "output = llm(_input)\n",
        "print(output)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "o-jdUoa3iyda",
        "outputId": "04aeb640-93f9-4d7f-ac99-6c0d7e75c520"
      },
      "execution_count": 59,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "male\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "output_parser.parse(output)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "u_97GW_GjBNQ",
        "outputId": "e6ae8ed1-a61a-4731-dc59-5614b2278828"
      },
      "execution_count": 60,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<Genders.MALE: 'male'>"
            ]
          },
          "metadata": {},
          "execution_count": 60
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "output_parser.parse('xyz')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 452
        },
        "id": "5lUJm3mSjGuP",
        "outputId": "b29b0cf1-ecc1-4f00-db73-78e5e672136f"
      },
      "execution_count": 61,
      "outputs": [
        {
          "output_type": "error",
          "ename": "OutputParserException",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain/output_parsers/enum.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m     24\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     26\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.10/enum.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(cls, value, names, module, qualname, type, start)\u001b[0m\n\u001b[1;32m    384\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mnames\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m  \u001b[0;31m# simple value lookup\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 385\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__new__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcls\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    386\u001b[0m         \u001b[0;31m# otherwise, functional API: we're creating a new Enum type\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/lib/python3.10/enum.py\u001b[0m in \u001b[0;36m__new__\u001b[0;34m(cls, value)\u001b[0m\n\u001b[1;32m    709\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mexc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 710\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0mve_exc\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    711\u001b[0m                 \u001b[0;32melif\u001b[0m \u001b[0mexc\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mValueError\u001b[0m: 'xyz' is not a valid Genders",
            "\nDuring handling of the above exception, another exception occurred:\n",
            "\u001b[0;31mOutputParserException\u001b[0m                     Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-61-ac45724e675a>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0moutput_parser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'xyz'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain/output_parsers/enum.py\u001b[0m in \u001b[0;36mparse\u001b[0;34m(self, response)\u001b[0m\n\u001b[1;32m     25\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstrip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     26\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 27\u001b[0;31m             raise OutputParserException(\n\u001b[0m\u001b[1;32m     28\u001b[0m                 \u001b[0;34mf\"Response '{response}' is not one of the \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     29\u001b[0m                 \u001b[0;34mf\"expected values: {self._valid_values}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mOutputParserException\u001b[0m: Response 'xyz' is not one of the expected values: ['male', 'female']"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# StructuredOutputParser"
      ],
      "metadata": {
        "id": "VgYMfxcYjQ4E"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain.output_parsers import StructuredOutputParser, ResponseSchema"
      ],
      "metadata": {
        "id": "bspETWUdjSnv"
      },
      "execution_count": 62,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "response_schemas = [\n",
        "    ResponseSchema(name=\"answer\", description=\"answer to the human's question\"),\n",
        "    ResponseSchema(name=\"source\", description=\"source used to answer the human's question, should be a website.\")\n",
        "]\n",
        "output_parser = StructuredOutputParser.from_response_schemas(response_schemas)"
      ],
      "metadata": {
        "id": "pdozG2V4jcTO"
      },
      "execution_count": 63,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "format_instructions = output_parser.get_format_instructions()\n",
        "prompt = PromptTemplate(\n",
        "    template=\"answer the users question as best as possible.\\n{format_instructions}\\n{question}\",\n",
        "    input_variables=[\"question\"],\n",
        "    partial_variables={\"format_instructions\": format_instructions}\n",
        ")"
      ],
      "metadata": {
        "id": "vMp2DJvJjpP9"
      },
      "execution_count": 64,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(format_instructions)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "YHlRmZF3yF5C",
        "outputId": "72dcd806-df4e-40fb-f30d-5648749f1a5e"
      },
      "execution_count": 65,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "The output should be a markdown code snippet formatted in the following schema, including the leading and trailing \"```json\" and \"```\":\n",
            "\n",
            "```json\n",
            "{\n",
            "\t\"answer\": string  // answer to the human's question\n",
            "\t\"source\": string  // source used to answer the human's question, should be a website.\n",
            "}\n",
            "```\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "_input = prompt.format_prompt(question=\"what are the ingredients of milk?\")\n",
        "output = llm(_input.to_string())\n",
        "\n",
        "print(output)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "jdfxd85_jtil",
        "outputId": "a6bbc547-03c2-459b-bdab-8fbfced2e667"
      },
      "execution_count": 66,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "\n",
            "```json\n",
            "{\n",
            "\t\"answer\": \"Milk is typically made up of water, fat, proteins, lactose (sugar) and minerals.\",\n",
            "\t\"source\": \"https://www.dairynz.co.nz/nutrition/what-is-in-milk/\"\n",
            "}\n",
            "```\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "json_data = output_parser.parse(output)"
      ],
      "metadata": {
        "id": "VHcXhoiUj1aG"
      },
      "execution_count": 68,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(json_data['answer'])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "n4cb1BQ2ypHU",
        "outputId": "7c9c99cf-bb3c-47a3-b7ec-c155e047e2aa"
      },
      "execution_count": 69,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Milk is typically made up of water, fat, proteins, lactose (sugar) and minerals.\n"
          ]
        }
      ]
    }
  ]
}